/*******************************************************************************************************************
 *______________________________________Number of Exchanges at the Best Price______________________________________*
 *******************************************************************************************************************/

/*
PURPOSE: 

    The goal of this script is to to produce a symbol-date-#_of_exchange level dataset 
    of the total duration (in milliseconds) any number of exchanges were at the Best Bid or Offer
    (among the top 8 exchanges).
    To be clear, the BBO caculated here is not the NBBO, but rather the BBO calculated among
    the Top 8 exchanges.
    
    Note that this script is meant to be used for the Top 8 exchanges only.
    
    For example for AAPL on January 2, there will be 9 rows. For each row i, 
    the dataset records the total duration #i exchanges that were at the BBO.
    For example in row 3, the total number of milliseconds in which 3 exchanges were at the BBO is recorded.

PRIMARY INPUT: 
    
    The Daily TAQ Quotes file is a dataset where a row is a single quote. Each row contains
    the date the quote was executed, the timestamp (millisecond precision before 8/3/2015,
    microsecond precision afterwards), the symbol root (name) and symbol suffix (equity class),
    the exchange where the quote was executed, the bid, bid size, ask, ask size,
    and the quote condition code that describes the context of the quote (e.g. an opening quote,
    regular quote, slow on the ask side, etc.).
        
APPROACH:

    The approach can be summarized as follows:

    STEP 1: We load the quotes for a single day and drop the quotes that were not from the 
    Top 8 exchanges, were made outside of regular trading hours (adjusted for halfdays), 
    and were made during holidays. Out of this set of quotes we define quotes satisfying the following 
    conditions as regular quotes. Only regular quotes are eligible for the BBO.
    
    1. The quote had a valid record and was not been cancelled or corrected.
    2. The quote is labelled as "regular, two-sided" by the quote sale condition code.

    STEP 2: We group the quotes by date, symbol, ms and exchange.
    At every ms, we compute the BBO as the maximum bid and minimum ask among the 
    current quotes as of the end ms. 
    
    STEP 3: We evaluate how many exchanges are at the BBO.
    
    STEP 4: We compute the total duration a given number (from 0 to 8) of exchanges are at the BBO.

    The code is structured as a large loop over each quote in the day, so although the algorithm
    can be conceptually broken down into three steps (calculate BBO, num exchanges at BBO, add duration), 
    all of these calculations are updated as we process quotes.

    The output is a symbol-date-exchange level dataset of the number of milliseconds each
    exchange is at the BBO for the symbol for a given day. 
    This is repeated for every day for every month in 2015.
    
SUPPLEMENTAL INPUT FILES: 
    
    "Server_Directories.xlsx"
    "Holidays.xlsx"
    "Symbol_Universe_2015" (this is produced after running the Sample Selection procedure)

INSTRUCTION:

    Find the wrapper "Num_Exchanges_at_BBO.sh". Make sure that in this file, the argument after
    "-sysparm" is "2015". Submit the wrapper on the WRDS server.
        
*/

/****************************************************************************************
 *______________________________________INITIALIZE______________________________________*
 ****************************************************************************************/

/*
 * WRDS LIBRARY:
 *
 * This sets up the wharton library, which allows us to access the CRSP and TAQ datasets.
 */
%include '/wrds/lib/utility/wrdslib.sas';
options mprint;

/* This allows the "in" operator to be used inside of macros. */
options minoperator;

/*
 * IMPORT INPUT PARAMETERS:
 * 
 * Make sure to enter the path to your input directory below.
 */
proc import datafile="/home/uchicago/eche/sasuser.v94/WRDS/Input/Server_Directories" out= server_directories dbms = xlsx replace;
run;
proc import datafile="/home/uchicago/eche/sasuser.v94/WRDS/Input/Holidays" out= holidays dbms = xlsx replace;
run;

data holiday_import;
    set holidays;
    
    if holiday = "New_Years" then do; call symput('New_Years', trim(date)); end;
    if holiday = "MLK_Day" then do; call symput('MLK_Day', trim(date)); end;
    if holiday = "Presidents_Day" then do; call symput('Presidents_Day', trim(date)); end;
    if holiday = "Good_Friday" then do; call symput('Good_Friday', trim(date)); end;
    if holiday = "Memorial_Day" then do; call symput('Memorial_Day', trim(date)); end;
    if holiday = "Independence_Day" then do; call symput('Independence_Day', trim(date)); end;
    if holiday = "Labor_Day" then do; call symput('Labor_Day', trim(date)); end;
    if holiday = "Thanksgiving" then do; call symput('Thanksgiving', trim(date)); end;
    if holiday = "Christmas" then do; call symput('Christmas', trim(date)); end;
    if holiday = "Hurricane_Sandy" then do; call symput('Hurricane_Sandy', trim(date)); end;
    if holiday = "Halfdays" then do; call symput('Halfdays', trim(date)); end;
run;

data directory_import;
    set server_directories;
    
    if directory = "user_directory" then do; call symput('userdir', trim(path)); end;
    if directory = "volume_directory" then do; call symput('voldir', trim(path)); end;
    if directory = "temp_directory" then do; call symput('tempdir', trim(path)); end;
run;

libname userdir &userdir;
libname voldir &voldir;
libname tempdir &tempdir;

/* Set user_path for import/export .
 * We need to dequote the path passed in from "Server_Directories.xlsx".
 */
%let user_path  = %qsysfunc(dequote(&userdir));

/**********************************************************************************************
 *______________________________________GLOBAL VARIABLES______________________________________*
 **********************************************************************************************/

/* Obtain year from the wrapper */
%let year_input = &SYSPARM;

/* Trade beginning and end times. */
%let trade_begin_time = "09:30:00.000"t;
%let trade_end_time_reg = "16:00:00.000"t;
%let trade_end_time_halfday = "13:00:00.000"t;

/* Set constant to convert seconds to milliseconds */
%let seconds_to_milliseconds = 1000;
%let micro_to_milliseconds = 1000;

/* Set constant for rounding to milliseconds. */
%let millisecond_precision = 0.001;

/*
 * Exchanges:
 * 
 * "Top_5_Exchanges" is the list of exchanges which are included 
 * when calculating the Best Bid or Offer.
 * If you want to restrict to the top 5 or enlarge the exchange pool, 
 * then change the exchange codes in "Top_5_Exchanges".
 * 
 * The following lists which code belongs to each exchange:
 * 
 * Top 5:
 * 
 * 'N': NYSE
 * 'P': NYSE Arca
 * 'T': Nasdaq (Tape A and B)
 * 'Z': Bats BZX
 * 'K': Bats EDGX
 * 'Q': Nasdaq (Tape C, all the volume under this exchange 
 * is moved to 'T' during this calculation)
 * 
 * Other Exchanges with non-zero trading in 2015:
 * 'A': NYSE Market
 * 'X': Nasdaq PSX
 * 'M': CHX
 */
%global Top_5_Exchanges;
%let Top_5_Exchanges = ('N','P','T','Z','K');

/*
 * "exchange_index_string" is used to index the arrays when storing the 
 * bids and asks at each exchange.
 * 
 * This means that in the bid array, for example, the first entry will be NYSE's 
 * bid since the first code in the string ('N') is the code for NYSE.
 * 
 * MAKE SURE: The exchange codes for the Top 5 Maker-Taker exchanges are 
 * the first five exchange codes in the "exchange_index_string".
 * 
 * However, any exchange specified here should also be present in "Top_5_Exchanges", 
 * because otherwise it will not appear at all in the output.
 * 
 * NOTE: 'Q' is in "Top_5_Exchanges" but not in "exchange_index_string". 
 * This is because we will combine 'T' and 'Q' (both NASDAQ) into 'T'.
 */
%global exchange_index_string;
%let exchange_index_string = NPTZK;

/* Number of exchanges in the exchange code string. */
%global nEx;
%let nEx = %length(&exchange_index_string);

/***********************************************************************************
 *______________________________________MACRO______________________________________*
 ***********************************************************************************/

/*
 * _MACRO_ : Daily_Ex_at_BBO(YYYYMMDD)
 * 
 * PARAM: YYYYMMDD: The day for which this macro calculates how long a given number 
 *  of exchanges were at the BBO.
 *
 * This Macro first loads the Daily TAQ Quotes file and restricts the data 
 * to quotes that occur during regular trading hours, not on a holiday, and
 * from a specified set of exchanges. We then label quotes as regular quotes
 * if they have the "regular" quote condition code and the "Not a cancel quote"
 * quote correction code. We only use regular quotes when determining the Best Bid and Offer.
 * 
 * After we've filtered the quotes dataset, we group them by date, symbol, ms,
 * and exchange.
 * 
 * [a] Duration that number of exchanges at previous ms:
 *     First, we calculate the duration (in ms) i exchanges were at the BBO (for every i
 *     from 0-8). (This would be STEP 3 in the header). We start with this as it is easier to calculate time 
 *     from previous in SAS than it is to calculate time to next. 
 * 
 *     To calculate this duration at the first quote of the ms, we measure the duration between
 *     this ms and the previous one (if this is the first ms we skip this step).
 *     If a valid BBO exists (two sided, not locked or crossed) then we check
 *     how many exchanges were at the BBO. We add the duration to the
 *     "duration" arrays in the slot indexed by the number of exchanges.
 * 
 *     At the very last millisecond of the day, we compute the duration as the 
 *     duration between the last millisecond of quoting and the end of the trading day.
 * 
 * [b] Calculate BBO:
 *     Second, for every quote in the ms, we update the current bid/ask 
 *     for the exchange in the quote. For irregular quotes, assign the bid/ask to null.
 *     Then we update the current BBO as the maximum bid and minimum ask of all 
 *     prevailing quotes. This is STEP 1 of the header.
 *
 * [c] Exchanges at BBO:
 *     Third, at the end of the ms, we consider the current BBO as of the end of the ms 
 *     as the BBO for the ms. We evaluate whether each exchange's current quote as of end
 *     of the ms is at the BBO. This is STEP 2 of the header.
 *
 * At the end, we print 9 rows for each symbol-date. Each row i contains various measures
 * of the duration i exchanges were at the BBO.
 * 
 * OUTPUT: Day_NumExAtBBO_&YYYYMMDD, Symbol-date-#_of_exchanges level dataset of the number of exchanges at the BBO for day YYYYMMDD
 */
%Macro Daily_Ex_at_BBO(YYYYMMDD)/mindelimiter=',';
    
    /*
     * Halfday correction:
     * 
     * Using the list of halfdays in the "Holidays.xlsx" input file, 
     * we identify whether the current day is a halfday or not.
     * If so, then we change the end of the trading day to 1:00 pm.
     */
    %let DDMMYYYY = "%sysfunc(inputn(&YYYYMMDD, yymmdd10.), date9.)"d;   
    %if &DDMMYYYY in &halfdays %then %do;
        %let trade_end_time = &trade_end_time_halfday;
    %end;
    %else %do;
        %let trade_end_time = &trade_end_time_reg;
    %end;
    
    /*
     * f_Exchanges
     * 
     * We only include quotes made by exchanges in the list in "Top_5_Exchanges".
     */
    %let f_Exchanges = ex in &Top_5_Exchanges;
    
    /* f_Holiday
     * 
     * We remove holidays from our sample.
     */
    %let f_New_Years_Day = date in &New_Years;
    %let f_MLK_Day=date in &MLK_Day;
    %let f_Presidents_Day=date in &Presidents_Day;
    %let f_Good_Friday=date in &Good_Friday;
    %let f_Memorial_Day=date in &Memorial_Day;
    %let f_Indpdnce_Day=date in &Independence_Day;
    %let f_Labor_Day=date in &Labor_Day;
    %let f_Thanksgiving=date in &Thanksgiving;
    %let f_Christmas_Day=date in &Christmas;
    
    %let f_Holiday = (&f_New_Years_Day or
                    &f_MLK_Day or
                    &f_Presidents_Day or
                    &f_Good_Friday or
                    &f_Memorial_Day or
                    &f_Indpdnce_Day or
                    &f_Labor_Day or
                    &f_Thanksgiving or
                    &f_Christmas_Day);

    /*
     * f_Regular_Hours
     * 
     * We only include quotes that occur during regular trading hours.
     */
    %let f_Regular_Hours = time_m >= &trade_begin_time and time_m < &trade_end_time;
    
    /*
     * INPUT: taqmsec.cqm_&YYYYMMDD, Daily TAQ Quotes file
     * 
     * Filters out the Daily TAQ Quotes File according to the conditions set above.
     * Identifies regular quotes with "f_Regular_Quote."
     * 
     * OUTPUT: quotes, Daily TAQ Quotes File with quote flags
     */   
    data quotes;
        set taqmsec.cqm_&YYYYMMDD.;
        where ((&f_Regular_Hours) AND (&f_Exchanges) and (~&f_Holiday));
      
        /* We move all the NASDAQ Quotes into one exchange code. */
        if ex = 'Q' then ex = 'T';
        
        /* f_Regular_Condition:
         * Quotes with the quote condition "R": Regular, two-sided quotes.
         */
        f_Regular_Condition = qu_cond ='R';
        
        /* Convert microsecond timestamp to milliseconds*/
        time_m = floor(time_m * &micro_to_milliseconds) / &micro_to_milliseconds;
        
        /* f_Valid_Record
         * 
         * qu_cancel:
         * "" = Not a cancel quote (CTA Only)
         * "A" = Not a cancel quote (UTP Only)
         */
        f_Valid_Record = qu_cancel in ("", "A");
        
        /* We define a regular quote to be a regular, two-sided quote that has a valid record. */
        f_Regular_Quote = f_Regular_Condition and f_Valid_Record;
    
        format date yymmdd10.;
        format time_m time12.3;
        
        keep date time_m ex sym_root sym_suffix bid ask bidsiz asksiz qu_cond;
    run;
    
    /* 
     * We sort the quotes by symbol, date, ms, and exchange.
     *
     * Note: Hasbrouck (2010) points out that the BBO calculation can be wrong 
     * if the sequence of quotes within an exchange is changed.
     * 
     * The EQUALS option for proc sort preserves the order of the original dataset
     * within the by groups. EQUALS is default for proc sort but we
     * specify it here for clarity.
     */  
    proc sort data = quotes out = quotes equals;
      by sym_root sym_suffix date time_m ex;
    run;
   
   /*
     * INPUT: quotes, Daily TAQ Quotes File with quote flags
     * 
     * We group the quotes dataset by symbol, date, ms, and exchange.
     * We then compute the BBO, find the number of exchanges at the BBO, and calculate the total duration
     * a given number of exchanges were at the BBO.
     * 
     * The output dataset will have a row, for example, containing the total number of milliseconds 
     * 3 exchanges were at the BBO for AAPL on January 2, 2015.
     * 
     * OUTPUT: NumExAtBBO_&YYYYMMDD, symbol-date-#_of_exchanges level data containing 
     * the duration that any number of exchanges were at the BBO for a symbol-date pair.
     */
   data NumExAtBBO_&YYYYMMDD.;
        set quotes;

        by sym_root sym_suffix date time_m ex;
      
        /**
         * INPUT: 1 row for every symbol-date-exchange quote
         * Data step: First we iterate by symbol. For each symbol we iterate by date.
         *  Then within each day, we iterate by millisecond. Within the millisecond, we iterate by quote.
         * OUTPUT: 1 row for every possible number of exchanges that could be at the BBO, per symbol-date pair (symbol-date - #_of_exchanges)
         */
        
        /**
         * ______________________________________INITIALIZATION______________________________________
         */
        
        /**       
         * exBid(nEx): the current bid at exchange i
         * exAsk(nEx): the current ask at exchange i
         * exAtBBid(nEx): 1 if exchange i is at the best bid, 0 if not
         * exAtBAsk(nEx): 1 if exchange i is at the best ask, 0 if not
         * duration_NumAtBB_8main(9): records the total duration that i total exchanges 
         * are at the best bid (best bid among the Top 8 exchanges)
         * duration_NumAtBO_8main(9): records the total duration that i many exchanges 
         * are at the best ask (best ask among the Top 8 exchanges)
         * duration_NumAtBB_5main(9): records the total duration that i many exchanges 
         * are at the best bid (best bid among the Top 8 exchanges)
         * duration_NumAtBO_5main(9): records the total duration that i many exchanges 
         * are at the best ask (best ask among the Top 8 exchanges)
         */
      
        array exBid(&nEx);
        array exAsk(&nEx);
        array exAtBBid(&nEx);
        array exAtBAsk(&nEx);
        array duration_NumAtBB_8main(9);
        array duration_NumAtBO_8main(9);
        array duration_NumAtBB_5main(9);
        array duration_NumAtBO_5main(9);
      
        retain exBid1-exBid&nEx exAsk1-exAsk&nEx 
            exAtBBid1-exAtBBid&nEx exAtBAsk1-exAtBAsk&nEx 
            BBid BAsk 
            duration_NumAtBB_8main1-duration_NumAtBB_8main9 
            duration_NumAtBO_8main1-duration_NumAtBO_8main9 
            duration_NumAtBB_5main1-duration_NumAtBB_5main9
            duration_NumAtBO_5main1-duration_NumAtBO_5main9
            num_exAtBBid_main8 num_exAtBAsk_main8 num_exAtBBid_main5 num_exAtBAsk_main5
            lockedMarket noTwoSideMarket duration_locked duration_noTwoSide
            prevLockedMarket prevNoTwoSideMarket prevTime time_firstQuote .;
        /**
         * ______________________________________FIRST MILLISECOND QUOTE OF THE DAY______________________________________
         */
         
        /* Reset Variables */
        if first.date then do;
        
            /* Reset arrays that save duration a number of exchanges were at the BBO */
            do j = 1 to 9;
                duration_NumAtBB_8main(j) = 0;
                duration_NumAtBO_8main(j) = 0;
                duration_NumAtBB_5main(j) = 0;
                duration_NumAtBO_5main(j) = 0;
            end;
            drop j;
        
           /* We reset the exBid and exAsk arrays, 
            * which record the current bid and ask at an exchange for a given symbol */  
            do i=1 to &nEx;
                exBid(i) = .;
                exAsk(i) = .;
                exAtBBid(i) = .;
                exAtBAsk(i) = .;      
            end;
            drop i;
       
            num_exAtBBid_main8 = .;
            num_exAtBAsk_main8 = .;
            num_exAtBBid_main5 = .;
            num_exAtBAsk_main5 = .;
            lockedMarket = .;
            noTwoSideMarket = .;
            prevLockedMarket = .;
            prevNoTwoSideMarket = .;
            duration_locked = 0;
            duration_noTwoSide = 0;
            prevTime = time_m;
            time_firstQuote = time_m;        
        end;
        
        /**
         * ______________________________________FOR EVERY QUOTE______________________________________
         */
          
        /* If the quote is not a regular quote, then we set the bid and the ask to null. */
        if f_Regular_Quote = 0 then do;
            bid = .;
            bidsiz = 0;
            ask = .;
            asksiz = 0;
        end;
        
        /* 
         * Set negative, null, or zero bids/asks to null. 
         * Set the sizes of such bids/asks to 0 too.
         * 
         * Bids, bidsizes, asks, and asksizes have not been observed to be negative in the quotes dataset
         * for a random sample of days (20150812, 20150206, and 20151221).
         * We do this to ensure that the if such quotes arrive, they are ineligible for the BBO
         */
        if bid <= 0 or bidsiz <= 0 or bid = . then do;
            bid = .;
            bidsiz = 0;
        end;
        if ask <= 0 or asksiz <= 0 or ask = . then do;
            ask = .;
            asksiz = 0;
        end;
        
        /**
         * ______________________________________FIRST QUOTE OF THE MILLISECOND______________________________________
         */
        
        /*
         * [a] Duration that number of exchanges at previous ms:
         * 
         * At the beginning of the millisecond we record the duration 
         * between this millisecond and the previous millisecond
         */             
        IF first.time_m AND not first.date THEN DO;         
            
            duration = round(time_m - prevTime, &millisecond_precision) * &seconds_to_milliseconds;         
            /*
             * If there was a valid BBO in the previous ms, we look up how many exchanges
             * were at the BBO in the previous ms and add the duration to the corresponding entry in the
             * number of exchanges at BBO arrays.
             * 
             * If there wasn't a valid BBO because the market was locked, crossed or missing a best bid 
             * or ask at the previous ms, we add this duration to "duration_LockedOrCrossed" 
             * or "duration_noTwoSide" instead.
             */
            if prevLockedMarket = 1 OR prevNoTwoSideMarket = 1 then do;
                duration_locked = duration_locked + prevLockedMarket*duration;
                duration_noTwoSide = duration_noTwoSide + prevNoTwoSideMarket*duration;             
            end;
            else do; 
                /*
                 * We look at the number of exchanges that were at the best bid or offer in the previous millisecond, call this number i.
                 * We add the duration between the previous millisecond and this millisecond
                 * To the duration arrays, at position i.
                 * 
                 * Because SAS does not have a 0 index for arrays, we assign the 9th index in the duration arrays
                 * To keep track of the total duration that 0 exchanges were at the BBO
                 */
                
                /* All Top 8 */
                IF num_exAtBBid_main8 = 0 THEN duration_NumAtBB_8main(9) = duration_NumAtBB_8main(9) + duration;
                ELSE duration_NumAtBB_8main(num_exAtBBid_main8) = duration_NumAtBB_8main(num_exAtBBid_main8) + duration;
              
                IF num_exAtBAsk_main8 = 0 THEN duration_NumAtBO_8main(9) = duration_NumAtBO_8main(9) + duration;
                ELSE duration_NumAtBO_8main(num_exAtBAsk_main8) = duration_NumAtBO_8main(num_exAtBAsk_main8) + duration;
                
                /* Top 5 only */
                IF num_exAtBBid_main5 = 0 THEN duration_NumAtBB_5main(9) = duration_NumAtBB_5main(9) + duration;
                ELSE duration_NumAtBB_5main(num_exAtBBid_main5) = duration_NumAtBB_5main(num_exAtBBid_main5) + duration;
              
                IF num_exAtBAsk_main5 = 0 THEN duration_NumAtBO_5main(9) = duration_NumAtBO_5main(9) + duration;
                ELSE duration_NumAtBO_5main(num_exAtBAsk_main5) = duration_NumAtBO_5main(num_exAtBAsk_main5) + duration;
            END;
        END;
        
        /**
         * ______________________________________Calculate BBO WITHIN THE MILLISECOND______________________________________
         */
        
        /*
         * [b] Calculate BBO:
         * 
         * Within the millisecond, we run through all the quotes and 
         * store the most recent bid and ask in the "exBid" and "exAsk" arrays.
         * With every new quote, we update these arrays and then update the Best Bid 
         * and Best Ask as the running maximum of these arrays respectively.
         * 
         * kEx is used to index the bid and offer arrays, as specified in "exchange_index_string"
         */         
        kEx = index("&exchange_index_string",ex);
        
        if kEx>0 then do;      
            exBid(kEx) = bid;
            exAsk(kEx) = ask;
    
            BBid = max(of exBid1-exBid&nEx);
            BAsk = min(of exAsk1-exAsk&nEx);
        end;
        
        /*
         * noTwoSideMarket: no best bid or best ask
         * lockedMarket: if somehow the best bid is above the best ask
         */   
        noTwoSideMarket = (BBid = . OR BAsk = .);
        lockedMarket = (BBid >= BAsk)*(noTwoSideMarket = 0);
        
        /**
         * ______________________________________END OF THE MILLISECOND______________________________________
         */
        
        /*
         * [c] Exchanges at BBO:
         * 
         * An exchange is considered to be at the BBO
         * if the last quote by the exchange during the ms is at the BBO.
         *
         * We save the best bid, best ask, whether the market was locked or not two sided, 
         * and how many exchanges among the Top 8 and Top 5 were at the BBO, so in the next
         * millisecond we can weight these values by the duration and add them to the running totals.
         */  
        IF last.time_m THEN DO;              
            do i = 1 to &nEx;
                exAtBBid(i) = (exBid(i) = BBid);
                exAtBAsk(i) = (exAsk(i) = BAsk);
            end;
            
            /* The sum of exchanges at the BBO, out of the top 8 exchanges */
            num_exAtBBid_main8 = sum(of exAtBBid1-exAtBBid8);
            num_exAtBAsk_main8 = sum(of exAtBAsk1-exAtBAsk8);
           
            /* The sum of exchanges at the BBO, only among the top 5 exchanges */
            num_exAtBBid_main5 = sum(of exAtBBid1-exAtBBid5);
            num_exAtBAsk_main5 = sum(of exAtBAsk1-exAtBAsk5);
            
            /* If no valid BBO, save this information for the next millisecond */
            prevLockedMarket = lockedMarket;
            prevNoTwoSideMarket = noTwoSideMarket;
            prevTime = time_m;     
        END;
        
        /*
         * ______________________________________LAST QUOTE OF THE DAY______________________________________
         */
          
        if last.date then do;     
            /**
             * Because the trading day is now over, we record the duration between 
             * the last millisecond of activity and the end of the trading day.
             */
            duration_last = round(&trade_end_time - time_m, &millisecond_precision) * &seconds_to_milliseconds;
            
            /* We use duration_total later when we compute shares for the duration i total exchanges are at the BBO */
            duration_total = round(&trade_end_time - time_firstQuote, &millisecond_precision) * &seconds_to_milliseconds;
       
            if lockedMarket = 1 OR noTwoSideMarket = 1 then do;
                duration_locked = duration_locked + lockedMarket * duration_last;
                duration_noTwoSide = duration_noTwoSide + noTwoSideMarket * duration_last;
            end;
            else do;
                /**
                 * We look at the number of exchanges that were at the best bid or ask 
                 * at the last millisecond of activity, call that i. Then we add the duration 
                 * between the last millisecond of activity and the end of the trading day
                 * to the "duration" arrays at position i. If the number of exchanges at the BBO
                 * is 0, then we add this duration to the 9th array.
                 */
                
                /* All Top 8 */
                if num_exAtBBid_main8 = 0 then duration_NumAtBB_8main(9) = duration_NumAtBB_8main(9) + duration_last;
                else duration_NumAtBB_8main(num_exAtBBid_main8) = duration_NumAtBB_8main(num_exAtBBid_main8) + duration_last;
          
                if num_exAtBAsk_main8 = 0 then duration_NumAtBO_8main(9) = duration_NumAtBO_8main(9) + duration_last;
                else duration_NumAtBO_8main(num_exAtBAsk_main8) = duration_NumAtBO_8main(num_exAtBAsk_main8) + duration_last;
                
                /* Top 5 only */
                if num_exAtBBid_main5 = 0 then duration_NumAtBB_5main(9) = duration_NumAtBB_5main(9) + duration_last;
                else duration_NumAtBB_5main(num_exAtBBid_main5) = duration_NumAtBB_5main(num_exAtBBid_main5) + duration_last;
          
                if num_exAtBAsk_main5 = 0 then duration_NumAtBO_5main(9) = duration_NumAtBO_5main(9) + duration_last;
                else duration_NumAtBO_5main(num_exAtBAsk_main5) = duration_NumAtBO_5main(num_exAtBAsk_main5) + duration_last;
            end;
            
            /**
             * Finally, for each possible number i of exchanges at the BBO (from 0 to 8).
             * Output a row for each possible number i of exchanges (9 rows).
             */
            do j = 1 to 9;
                num_exAtBestPrice = j;
                count_NBB_main8 = duration_NumAtBB_8main(j);
                count_NBO_main8 = duration_NumAtBO_8main(j);
                count_both_main8 = duration_NumAtBB_8main(j) + duration_NumAtBO_8main(j);
                count_NBB_main5 = duration_NumAtBB_5main(j);
                count_NBO_main5 = duration_NumAtBO_5main(j);
                count_both_main5 = duration_NumAtBB_5main(j) + duration_NumAtBO_5main(j);
                output;
            end;
        end;
        else delete;
        
        keep date symbol sym_root sym_suffix num_exAtBestPrice count_NBB_main8 count_NBO_main8 count_both_main8 
               count_NBB_main5 count_NBO_main5 count_both_main5 duration_locked duration_noTwoSide duration_total;
            
    run;
   
    /*** Save the daily file to the volume directory ***/
    data voldir.Day_NumExAtBBO_top5_&YYYYMMDD.;
        set NumExAtBBO_&YYYYMMDD.;
    run;

    /* clear the work directory */
   /*
    proc datasets library = work;
        delete quotes;
        delete NumExAtBBO_&YYYYMMDD.;
    run;
    */
%Mend;

/*
 * _MACRO_ : Monthly_Ex_at_BBO(YYYYMM)
 * 
 * PARAM: YYYYMM: The month for which this macro calculates the number of exchanges at the BBO
 *
 * REQUIRES: Daily_Ex_at_BBO
 *
 * This Macro iterates through all the non-zero quoting days in a given month (denoted by YYYYMM) specified from the "TAQMSEC" library.
 * For each day it runs the Daily_Ex_at_BBO macro. Afterwards, we load all the daily files into a monthly dataset, and delete the daily files.
 *
 * OUTPUT: Month_NumExAtBBO_&YYYYMM, Symbol-date-#_of_exchanges level dataset of the number of exchanges at the BBO for month YYYYMM
 */
%Macro Monthly_Ex_at_BBO(YYYYMM);

    /*
     * INPUT: sashelp.vtable, this is a table containing a row for all available datasets.
     * There's a column for "libname", the library the dataset belongs to, and "memname", 
     * which is the name of the dataset.
     * 
     * Extracts the Daily TAQ Quotes filenames for each day in the month.
     * 
     * OUTPUT: Daily_Quotes_Filenames, list of filenames
     */
    proc sql;
        create table Daily_Quotes_Filenames
        as select 
        libname, 
        memname 
        from sashelp.vtable where 
        libname = "TAQMSEC" and memname like "CQM_&YYYYMM.%";
    quit;

    /*
     * Counts the number of filenames in Daily_Quotes_Filenames as "nobs".
     * Save a list of the filenames as "file_list", separated by "|". 
     * These are used to iterate over the valid quoting days within the month.
     */
    proc sql print;
        select count(*) into: nobs from Daily_Quotes_Filenames;
        select memname into: file_list separated by '|' from Daily_Quotes_Filenames;
    quit;

    /* Loop over daily filenames.. */ 
    %do i = 1 %to &nobs.;

        /* Get the name of the days and the daily file names in the month. */
        %let cqm_file  = %scan(%quote(&file_list.), &i., '|');
        
        /* The date is extracted from the filename by taking the content after "CTM_". */
        %let YYYYMMDD  = %substr(&cqm_file., 5, 8);

        /* Apply Daily_Ex_at_BBO Macro. */
        %Daily_Ex_at_BBO(YYYYMMDD=&YYYYMMDD.);
    %end;

    /*** Save monthly file to the volume directory. ***/
    data voldir.Month_NumExAtBBO_top5_&YYYYMM.;
        set voldir.Day_NumExAtBBO_top5_&YYYYMM.:;
    run;

    /* Delete the daily files in the volume directory. */
    proc datasets library = voldir;
        delete Day_NumExAtBBO_&YYYYMM.:;
    run;

%Mend;

/*
 * _MACRO_ : Annual_Ex_at_BBO(YYYY)
 * 
 * PARAM: YYYY: The year for which this macro calculates the number of exchanges at the BBO
 *
 * REQUIRES: Monthly_Ex_at_BBO
 *
 * This macro runs "Monthly_Ex_at_BBO" for every month in the year.
 * For 2007, we begin from October since that was when Regulation NMS was implemented.
 * 
 * OUTPUT: NumExAtBBO_&YYYY, symbol-date-#_of_exchanges level dataset
 */
%Macro Annual_Ex_at_BBO(YYYY);
    %let month_list = 01|02|03|04|05|06|07|08|09|10|11|12;
    
    /* 
     * Iterate across all months in the year for years after 2007.
     * Run Monthly_Ex_at_BBO for every month in the year.
     */
    %if (&YYYY. > 2007) %then %do;
        %do m = 1 %to 12;
            %let month = %scan(%quote(&month_list.), &m., '|');
            %Monthly_Ex_at_BBO(YYYYMM=&YYYY.&month.);
        %end;
    %end;

    /* For 2007, start data collection in october when REG NMS starts. */    
    %if (&YYYY.=2007) %then %do;
        %do m = 10 %to 12;
            %let month = %scan(%quote(&month_list.), &m., '|');
            %Monthly_Ex_at_BBO(YYYYMM=&YYYY.&month.);
        %end;
    %end;

    /*** Save annual file to the volume directory. ***/
    data voldir.NumExAtBBO_top5_&YYYY.;
        set voldir.Month_NumExAtBBO_top5_&YYYY.:;
    run;
    
     /* Save in temporary directory too */
    data tempdir.NumExAtBBO_top5_&YYYY.;
        set voldir.NumExAtBBO_top5_&YYYY.;
    run;
    
    /* Export to CSV */
    proc export data = tempdir.NumExAtBBO_top5_&YYYY.;
        outfile = "&user_path./numexatbbo_top5_raw_&YYYY.csv"
        DBMS = csv
        replace;
    run;

    /* Delete the monthly files in the volume directory. */
    proc datasets library = voldir;
        delete Month_NumExAtBBO_top5_&YYYY.:;
        run;
    quit;
    
    
%Mend;

/*
 * _MACRO_ : Sample_Universe_Merge(YYYYMM)
 * 
 * PARAM: YYYY: The month for which this macro merges NumExAtBBO and Symbol Universe.
 *
 * REQUIRES: Annual_Ex_at_BBO
 *
 * This Macro merges the yearly NumExAtBBO datasets and the Symbol Universe file.
 * Then it uses the Symbol Universe file's sample indicator to restrict the NumExAtBBO 
 * datasets to the sample used in the paper.
 */
%Macro Sample_Universe_Merge(YYYY);
    
    /* Read in Symbol Universe 2015 file. */
    data symbol_universe_&YYYY.;
        set tempdir.symbol_universe_&YYYY.;
    run;
    
    /**
     * INPUT: symbol_universe_2015, symbol level characteristics
     * 
     * Filters the symbol universe to only include the symbols in the symbol sample. 
     * Combines the symbol root and the symbol suffix into one string.
     * 
     * OUTPUT: symbol_sample, symbol level characteristics for symbols in Sample 1
     */
    data symbol_sample;
        set symbol_universe_&YYYY.;
        where f_Sample = 1;
        symbol = trim(sym_root) || trim(sym_suffix);
        
        keep sym_root sym_suffix symbol f_sample f_sample_T100 f_NYSE_listed;
    run;
    
    /* Sort symbol_universe by symbol */
    proc sort data = symbol_sample out = symbol_sample;
        by sym_root sym_suffix symbol;
    run;
    
    /**
     * INPUT: tempdir.numexatBBO_2015, symbol-date-#_of_exchanges level data of the number 
     * of milliseconds a given number of exchanges were at the BBO
     * 
     * Loads the number of exchanges at BBO dataset
     * 
     * OUTPUT: NumExAtBBO_2015, symbol-date-#_of_exchanges level data of the number of milliseconds
     * a given number of exchanges were at the BBO
     */
    data NumExAtBBO_&YYYY.;
        set voldir.numexatbbo_top5_&YYYY.;
        format date yymmdd10.;
        
        symbol = trim(sym_root) || trim(sym_suffix);
        
        /*because SAS array index starts with 1 (not 0) we store 0 as 9 */
        if num_exAtBestPrice = 9 then num_exAtBestPrice = 0;  
    run;
    
    /* Sort the number of exchanges at BBO dataset */
    proc sort data = NumExAtBBO_&YYYY. out = NumExAtBBO_&YYYY.;
        by sym_root sym_suffix symbol date num_exAtBestPrice;
    run;
    
    /**
     * INPUTA: NumExAtBBO_2015, symbol-date-#_of_exchanges level data of the number 
     * of milliseconds a given number of exchanges were at the BBO
     * INPUTB: symbol_sample, list of symbols in the sample
     * 
     * Merges the number of exchanges at BBO dataset with the symbol sample
     * 
     * OUTPUT: NumExAtBBO_Sample_2015, number of milliseconds a given number of exchanges were at the BBO
     * among symbols in the symbol sample
     */
    data NumExAtBBO_Sample_&YYYY.;
       merge symbol_sample NumExAtBBO_&YYYY. ;
       by sym_root sym_suffix symbol;
    run;

    /**
     * INPUT: NumExAtBBO_Sample_2015, symbol-date-#_of_exchanges level data of the number of milliseconds 
     * a given number of exchanges were at the BBO among symbols in the symbol sample
     * 
     * Confirms that the number of exchanges at BBO dataset is restricted to the symbol sample.
     * 
     * OUTPUT: NumExAtBBO_Sample_2015, symbol-date-#_of_exchanges level data of the number of milliseconds 
     * a given number of exchanges were at the BBO among symbols in the symbol sample
     */
    data NumExAtBBO_Sample_&YYYY.;
       set NumExAtBBO_Sample_&YYYY.;
       where f_sample = 1;
    run;
    
    /**
     * INPUT: NumExAtBBO_Sample_2015, symbol-date-#_of_exchanges level data of the number of milliseconds 
     * a given number of exchanges were at the BBO among symbols in the symbol sample
     * 
     * We calculate the percentage of all milliseconds in the trading day
     * that a particular number of exchanges were at the BBO (0 - 9).
     * 
     * We calculate the percentage when considering all the milliseconds that any
     * of the top 8 exchanges were at the BBO, and the percentage when considering
     * all of the milliseconds that any of the top 5 exchanges were at the BBO.
     * 
     * OUTPUT: NumExAtBBO_Shares_2015, symbol-date-#_of_exchanges level data of the counts
     * and shares of the milliseconds a given number of exchanges were at the BBO.
     */
    proc sql;
        create table NumExAtBBO_Shares_&YYYY.
        as select
        a.*,
        a.count_both_main8 / sum(a.count_both_main8)   as percent_BBO_Top8,
        a.count_both_main5 / sum(a.count_both_main5)   as percent_BBO_Top5
        from NumExAtBBO_Sample_&YYYY. as a
        group by date, symbol, sym_root, sym_suffix
        order by date, symbol, num_exatbestprice;
    quit;
    
    /* Export to CSV */
    proc export data = NumExAtBBO_Shares_&YYYY.
        outfile = "&user_path./numexatBBO_top5_shares_&YYYY..csv"
        DBMS = csv
        replace;
    run;

%Mend;

/* Performs the entire process */
%Macro Num_Ex_at_BBO(Year);
    %Annual_Ex_at_BBO(&Year.);
    %Sample_Universe_Merge(&Year.);
%Mend;


/**********************************************************************************
 * ______________________________________RUN______________________________________*
 **********************************************************************************/

%Daily_Ex_at_BBO(20150102);
*%Annual_Ex_at_BBO(&year_input.);
*%Sample_Universe_Merge(2015);
